Load in librarys & data set.
install.packages(stringr)
library(tidyverse)
library(dplyr)
library(stringr)
read_csv("data/books.csv")
Rename data set for research
Initial reading of data set
dim(books_info)
[1] 11131 12
nrow(books_info)
[1] 11131
ncol(books_info)
[1] 12
names(books_info)
[1] "bookID" "title" "authors"
[4] "average_rating" "isbn" "isbn13"
[7] "language_code" "num_pages" "ratings_count"
[10] "text_reviews_count" "publication_date" "publisher"
Find missing values
books_info %>%
select(everything()) %>%
summarise_all(funs(sum(is.na(.))))
`funs()` is deprecated as of dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
Replace/remove missing values
Review/investigate data set
View the data set by only looking at author,average rating, rating count and publisher.
Arrange order by average rating in desc
The above data shows that arranginf by the average rating is missleading as some of these results have been rated very few times therefore have a higher average rating.
Lets arrange the same data set by ratings count to compare the average rating to the number of ratings to get a better value.
arrange(top_rated, desc(ratings_count)) %>%
print
The data above lets us see the average rating in desc order compared to the average rating.
Filter only results by author j.k rowling/mary grandpre
Organise above date by rating count in desc order
jkrowling_books <-jkrowling %>%
select(authors, average_rating, ratings_count, publication_date)
arrange(jkrowling_books, desc(ratings_count))
Arrange by average rating
jkrowling_books %>%
select(authors, average_rating, ratings_count)
arrange(jkrowling_books, desc(average_rating))
Now lets look at the data that only has over 20000000 or above ratings
jk_highest_rated <- jkrowling_books %>%
select(average_rating, ratings_count) %>%
filter(ratings_count >= 2000000)
jk_highest_rated
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKTG9hZCBpbiBsaWJyYXJ5cyAmIGRhdGEgc2V0LgoKCmBgYHtyfQoKYGBgCgpgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcyhzdHJpbmdyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShkcGx5cikKbGlicmFyeShzdHJpbmdyKQpyZWFkX2NzdigiZGF0YS9ib29rcy5jc3YiKQoKYGBgCgpSZW5hbWUgZGF0YSBzZXQgZm9yIHJlc2VhcmNoCgpgYGB7cn0KYm9va3NfaW5mbyA8LSByZWFkLmNzdigiZGF0YS9ib29rcy5jc3YiKQoKYm9va3NfaW5mbwpgYGAKCgpJbml0aWFsIHJlYWRpbmcgb2YgZGF0YSBzZXQKCmBgYHtyfQpkaW0oYm9va3NfaW5mbykKYGBgCgoKYGBge3J9Cm5yb3coYm9va3NfaW5mbykKYGBgCgpgYGB7cn0KbmNvbChib29rc19pbmZvKQpgYGAKCmBgYHtyfQpuYW1lcyhib29rc19pbmZvKQpgYGAKCkZpbmQgbWlzc2luZyB2YWx1ZXMgCgpgYGB7cn0KYm9va3NfaW5mbyAlPiUKICBzZWxlY3QoZXZlcnl0aGluZygpKSAlPiUKICBzdW1tYXJpc2VfYWxsKGZ1bnMoc3VtKGlzLm5hKC4pKSkpCmBgYAoKUmVwbGFjZS9yZW1vdmUgbWlzc2luZyB2YWx1ZXMKCmBgYHtyfQpib29rc19pbmZvMiA8LSBib29rc19pbmZvICU+JSAKICBkcm9wX25hKHJhdGluZ3NfY291bnQsIHRleHRfcmV2aWV3c19jb3VudCkKYm9va3NfaW5mbzIKYGBgCgpSZXZpZXcvaW52ZXN0aWdhdGUgZGF0YSBzZXQKClZpZXcgdGhlIGRhdGEgc2V0IGJ5IG9ubHkgbG9va2luZyBhdCBhdXRob3IsYXZlcmFnZSByYXRpbmcsIHJhdGluZyBjb3VudCBhbmQgcHVibGlzaGVyLgoKQXJyYW5nZSBvcmRlciBieSBhdmVyYWdlIHJhdGluZyBpbiBkZXNjCgpgYGB7cn0KdG9wX3JhdGVkIDwtIGJvb2tzX2luZm8yICU+JSAKICBzZWxlY3QoYXV0aG9ycywgYXZlcmFnZV9yYXRpbmcsIHJhdGluZ3NfY291bnQpCiAgYXJyYW5nZSh0b3BfcmF0ZWQsIGRlc2MoYXZlcmFnZV9yYXRpbmcpKQp0b3BfcmF0ZWQKYGBgClRoZSBhYm92ZSBkYXRhIHNob3dzIHRoYXQgYXJyYW5naW5mIGJ5IHRoZSBhdmVyYWdlIHJhdGluZyBpcyBtaXNzbGVhZGluZyBhcyBzb21lIG9mIHRoZXNlIHJlc3VsdHMgaGF2ZSBiZWVuIHJhdGVkIHZlcnkgZmV3IHRpbWVzIHRoZXJlZm9yZSBoYXZlIGEgaGlnaGVyIGF2ZXJhZ2UgcmF0aW5nLgoKTGV0cyBhcnJhbmdlIHRoZSBzYW1lIGRhdGEgc2V0IGJ5IHJhdGluZ3MgY291bnQgdG8gY29tcGFyZSB0aGUgYXZlcmFnZSByYXRpbmcgdG8gdGhlIG51bWJlciBvZiByYXRpbmdzIHRvIGdldCBhIGJldHRlciB2YWx1ZS4KCmBgYHtyfQphcnJhbmdlKHRvcF9yYXRlZCwgZGVzYyhyYXRpbmdzX2NvdW50KSkgJT4lIAogIHByaW50CmBgYApUaGUgZGF0YSBhYm92ZSBsZXRzIHVzIHNlZSB0aGUgYXZlcmFnZSByYXRpbmcgaW4gZGVzYyBvcmRlciBjb21wYXJlZCB0byB0aGUgYXZlcmFnZSByYXRpbmcuCgoKRmlsdGVyIG9ubHkgcmVzdWx0cyBieSBhdXRob3Igai5rIHJvd2xpbmcvbWFyeSBncmFuZHByZQoKYGBge3J9Cmprcm93bGluZyA8LSBmaWx0ZXIoYm9va3NfaW5mbzIsIGF1dGhvcnMgPT0gIkouSy4gUm93bGluZy9NYXJ5IEdyYW5kUHLDqSIpCgpqa3Jvd2xpbmcKYGBgCgpPcmdhbmlzZSBhYm92ZSBkYXRlIGJ5IHJhdGluZyBjb3VudCBpbiBkZXNjIG9yZGVyCgpgYGB7cn0Kamtyb3dsaW5nX2Jvb2tzIDwtamtyb3dsaW5nICU+JSAKICBzZWxlY3QoYXV0aG9ycywgYXZlcmFnZV9yYXRpbmcsIHJhdGluZ3NfY291bnQpCmFycmFuZ2Uoamtyb3dsaW5nX2Jvb2tzLCBkZXNjKHJhdGluZ3NfY291bnQpKQpgYGAKCkFycmFuZ2UgYnkgYXZlcmFnZSByYXRpbmcgCgpgYGB7cn0Kamtyb3dsaW5nX2Jvb2tzICU+JSAKICBzZWxlY3QoYXV0aG9ycywgYXZlcmFnZV9yYXRpbmcsIHJhdGluZ3NfY291bnQpCmFycmFuZ2Uoamtyb3dsaW5nX2Jvb2tzLCBkZXNjKGF2ZXJhZ2VfcmF0aW5nKSkKYGBgCgpOb3cgbGV0cyBsb29rIGF0IHRoZSBkYXRhIHRoYXQgb25seSBoYXMgb3ZlciAyMDAwMDAwMCBvciBhYm92ZSByYXRpbmdzCgpgYGB7cn0KamtfaGlnaGVzdF9yYXRlZCA8LSBqa3Jvd2xpbmdfYm9va3MgJT4lIAogIHNlbGVjdChhdmVyYWdlX3JhdGluZywgcmF0aW5nc19jb3VudCkgJT4lIAogIGZpbHRlcihyYXRpbmdzX2NvdW50ID49IDIwMDAwMDApCmprX2hpZ2hlc3RfcmF0ZWQKYGBgCgo=